In [ ]:
import pandas as pd
import plotly.express as px
pd.options.display.max_columns = None

from src.ctgov.lib.helpers import protocol_feature_query, _get_us_locations
from src.ctgov.lib.visuals import draw_org_fragmentation

protocol_features = protocol_feature_query(phases=['Phase 2','Phase 3']) # extracts desired features from clinicaltrials.gov
protocol_features
Out[ ]:
_trial_id _org_name _org_class _sponsor_name _sponsor_class _collaborator _condition _location _eligibility _status _arm _random _enrollment _phase _location_count _arm_count _start_yr _end_yr _last_yr _enrollment_z _location_count_z _arm_count_z
0 NCT04834349 M.D. Anderson Cancer Center OTHER M.D. Anderson Cancer Center OTHER None [Recurrent Head and Neck Squamous Cell Carcino... [{'LocationCity': 'Houston', 'LocationContactL... Inclusion Criteria:\n\nPatients with biopsy pr... Recruiting [{'ArmGroupDescription': 'Patients receive NBT... Non-Randomized 80.0 Phase 2 1.0 2.0 2021.0 2025.0 2021 -0.044914 -0.197093 -0.048174
1 NCT04838444 Valneva Austria GmbH INDUSTRY Valneva Austria GmbH INDUSTRY None [Chikungunya Virus Infection] [{'LocationCity': 'Phoenix', 'LocationContactL... Inclusion Criteria:\n\nSubject participated in... Enrolling by invitation [{'ArmGroupDescription': None, 'ArmGroupInterv... N/A 375.0 Phase 3 11.0 1.0 2021.0 2025.0 2021 0.002014 0.138348 -0.826292
2 NCT04837820 Memorial Sloan Kettering Cancer Center OTHER Memorial Sloan Kettering Cancer Center OTHER None [Breast Cancer] [{'LocationCity': 'Commack', 'LocationContactL... Inclusion Criteria:\n\nEnglish-proficient adul... Recruiting [{'ArmGroupDescription': 'The intervention wil... Randomized 260.0 Phase 2 4.0 3.0 2021.0 2025.0 2021 -0.016280 -0.096461 0.729943
3 NCT04837508 Shanghai Miracogen Inc. INDUSTRY Shanghai Miracogen Inc. INDUSTRY None [Advanced or Metastatic Biliary Tract Cancer] [{'LocationCity': 'Bengbu', 'LocationContactLi... Inclusion Criteria:\n\nWilling to sign the ICF... Recruiting [{'ArmGroupDescription': 'MRG002 will be admin... N/A 86.0 Phase 2 7.0 1.0 2021.0 2022.0 2021 -0.043959 0.004172 -0.826292
4 NCT04830449 Hanmi Pharmaceutical Company Limited INDUSTRY Hanmi Pharmaceutical Company Limited INDUSTRY None [Hypertension] [{'LocationCity': 'Seoul', 'LocationContactLis... Inclusion Criteria:\n\nPatients over 18 years ... Recruiting [{'ArmGroupDescription': None, 'ArmGroupInterv... Randomized 116.0 Phase 3 1.0 2.0 2020.0 2021.0 2021 -0.039187 -0.197093 -0.048174
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
88317 NCT00553072 Sheri Kashmir Institute of Medical Sciences OTHER Sheri Kashmir Institute of Medical Sciences OTHER None [Perinatal Asphyxia , Moderate to Severe HIE] [{'LocationCity': 'Srinagar', 'LocationContact... Inclusion Criteria:\n\nBabies eligible for the... Completed [{'ArmGroupDescription': 'Magnesium sulphate 2... Randomized 40.0 Phase 3 1.0 2.0 2004.0 2006.0 2016 -0.075429 -0.172691 -0.059196
88318 NCT00554164 University Hospital, Essen OTHER University Hospital, Essen OTHER [{'CollaboratorClass': 'OTHER', 'CollaboratorN... [Lymphoma, High-grade] [{'LocationCity': 'Essen', 'LocationContactLis... Inclusion Criteria:\n\nAggressive B-cell or T-... Completed [{'ArmGroupDescription': 'Six cycles of the (R... Randomized 1073.0 Phase 3 1.0 4.0 2007.0 2017.0 2017 0.156424 -0.172691 1.421984
88319 NCT00554723 CHIMES Society OTHER CHIMES Society OTHER [{'CollaboratorClass': 'OTHER_GOV', 'Collabora... [Cerebral Infarction, Stroke] [{'LocationCity': 'Hong Kong', 'LocationContac... Inclusion Criteria:\n\nSubject is aged 18 year... Completed [{'ArmGroupDescription': 'NeuroAid', 'ArmGroup... Randomized 1100.0 Phase 3 24.0 2.0 2007.0 2012.0 2014 0.162484 0.470072 -0.059196
88320 NCT00559845 Hoffmann-La Roche INDUSTRY Hoffmann-La Roche INDUSTRY None [Breast Cancer] [{'LocationCity': 'Napoli', 'LocationContactLi... Inclusion Criteria:\n\nfemale participants, >=... Completed [{'ArmGroupDescription': 'Participants will re... N/A 56.0 Phase 2 8.0 1.0 2008.0 2015.0 2017 -0.071838 0.022932 -0.799786
88321 NCT00558311 Idorsia Pharmaceuticals Ltd. INDUSTRY Idorsia Pharmaceuticals Ltd. INDUSTRY None [Aneurysmal Subarachnoid Hemorrhage] [{'LocationCity': 'Phoenix', 'LocationContactL... Inclusion Criteria:\n\nMales and females aged ... Completed [{'ArmGroupDescription': 'A continuous intrave... Randomized 1157.0 Phase 3 117.0 2.0 2007.0 2010.0 2020 0.175277 3.069070 -0.059196

88322 rows × 22 columns

Organization Fragmentation¶

In [ ]:
draw_org_fragmentation(protocol_features)
'study count by org'
_org_class _org_name study_count _pct _cumm_pct
0 INDUSTRY Novartis 1804 0.020425 0.020425
1 INDUSTRY GlaxoSmithKline 1597 0.018082 0.038507
2 NIH National Cancer Institute (NCI) 1494 0.016915 0.055422
3 INDUSTRY Pfizer 1216 0.013768 0.069190
4 INDUSTRY Sanofi 1129 0.012783 0.081973
... ... ... ... ... ...
9024 OTHER Hospital Infantil Universitario Niño Jesús, Ma... 1 0.000011 0.999955
9025 INDUSTRY Neurelis, Inc. 1 0.000011 0.999966
9026 INDUSTRY Neuraptive Therapeutics Inc. 1 0.000011 0.999977
9027 OTHER Hospital Mateo Orfila 1 0.000011 0.999989
9028 FED 375th Medical Group, Scott Air Force Base 1 0.000011 1.000000

9029 rows × 5 columns

_workbench¶

Location Fragmentation¶

Where do these trials take place?

In [ ]:
location = _get_us_locations(protocol_features)
location.sample(5)
Out[ ]:
LocationCity LocationFacility LocationState LocationStatus LocationZip index _trial_id _location_count _phase _org_name _state _zip5 adi_median _lat _lng _census_total
29482 Kalispell Local Institution Montana Withdrawn 59901 76278 NCT04186871 112.0 Phase 2 Bristol-Myers Squibb MT 59901 40.0 48.230565 -114.403313 49693
18425 Commack Memorial Sloan Kettering Commack (All Protocol... New York Recruiting 11725 52611 NCT04757363 7.0 Phase 2 Memorial Sloan Kettering Cancer Center NY 11725 12.0 40.840584 -73.280817 29150
12727 Scottsdale Nimbus site 132 Arizona Recruiting 85255 72395 NCT04999839 56.0 Phase 2 Nimbus Therapeutics AZ 85255 6.0 33.668188 -111.822512 37270
10856 Duarte City of Hope Comprehensive Cancer Center California Not yet recruiting 91010 53317 NCT05181540 8.0 Phase 3 Angiocrine Bioscience CA 91010 19.0 34.141013 -117.958661 26074
6326 Rochester Mayo Clinic Minnesota Recruiting 55905 61640 NCT03995901 17.0 Phase 3 Talaris Therapeutics Inc. MN 55905 65.0 44.055948 -92.525906 0
In [ ]:
phases = location["_phase"].unique()
unique_location = location[['LocationFacility','LocationZip']].drop_duplicates()
px.treemap(
    location,
    path = ['LocationStatus','LocationState','LocationCity'],
    height=600,
    title=f"Trial Locations by Status, State, City | Scaled by Study Count | {len(unique_location)} Unique Locations | Trial Phases: {phases}"
).show(renderer="notebook")
In [ ]:
location_counts = location.groupby(['LocationFacility','_lat','_lng']).agg(study_count=("_trial_id","nunique")).sort_values("study_count", ascending=False).reset_index()
px.scatter_mapbox(
    location_counts,
    lat="_lat",
    lon="_lng",
    size="study_count",
    title="Where do these studies take place?",
    hover_data=['LocationFacility'],
    height=700,
    zoom=3,
).show(renderer="notebook")